---
title: "R Notebook"
output: html_notebook
---


Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. 

Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Ctrl+Alt+I*.

```{r}

rm(list = ls())

library(tidyverse)
library(readr)
library(reldist)
library(openxlsx)
library(spatstat)
library(quantreg)
library(ggthemes)
library(scales)

setwd("C:/Users/PeterLimerick/Desktop/CAPITA-master/Output analysis/")

weighted.geomean <- function(x, w, ...) exp(weighted.mean(log(x), w, ...))


```

# Getting outfiles in shape

```{r}
outfile_base <- read_csv("CAPITA_OUTFILE_BASE.csv")

select <- outfile_base %>% select(contains("LfStatr_"), contains("LfStats_"), contains("LfStat1_"), contains("LfStat2_"),
                                   contains("LfStat3_"), contains("LfStat4_"), contains("NsaSW"), 
                                   matches("AllowType"), matches("PenType"), matches("StudyType._"), matches("ActualAge._"),
                                   matches("ParPaySW._"), contains("Weight"), contains("IncDispA"),  contains("IncPrivA"),
                                   matches("^IUID"), contains("IUTypeS"), contains("DepsUnder15"), contains("IUPosS"),
                                  contains("JspTotA"), contains("YaTotA"), contains("FtbaA"), contains("FtbbA"), contains("YaRand"))

colnames(select) <- colnames(select) %>% str_remove("_Base")

select_nd <- unique(select$IUID)

## We note that each line is an income unit, and each income unit will have a number of DepsUnder15 not noted within the income unit.

labour_r <- select %>% select(matches("r$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_s <- select %>% select(matches("s$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_1 <- select %>% select(matches("1$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_2 <- select %>% select(matches("2$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_3 <- select %>% select(matches("3$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_4 <- select %>% select(matches("4$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

colnames(labour_r) <- colnames(labour_r) %>% str_remove("r$")

colnames(labour_s) <- colnames(labour_s) %>% str_remove("s$")

colnames(labour_1) <- colnames(labour_1) %>% str_remove("1$")

colnames(labour_2) <- colnames(labour_2) %>% str_remove("2$")

colnames(labour_3) <- colnames(labour_3) %>% str_remove("3$")

colnames(labour_4) <- colnames(labour_4) %>% str_remove("4$")

labour <- labour_r %>% rbind(labour_s, labour_1, labour_2, labour_3, labour_4)

labour <- labour %>% filter(!is.na(LfStat)) %>% rename(IUWeightSu = weight)

labour_base <- labour

remove(outfile_base)
```

```{r}
outfile_m5 <- read_csv("CAPITA_OUTFILE_M5.csv")

select <- outfile_m5 %>% select(contains("LfStatr_"), contains("LfStats_"), contains("LfStat1_"), contains("LfStat2_"),
                                   contains("LfStat3_"), contains("LfStat4_"), contains("NsaSW"), 
                                   matches("AllowType"), matches("PenType"), matches("StudyType._"), matches("ActualAge._"),
                                   matches("ParPaySW._"), contains("Weight"), contains("IncDispA"),  contains("IncPrivA"),
                                   matches("^IUID"), contains("IUTypeS"), contains("DepsUnder15"), contains("IUPosS"),
                                  contains("JspTotA"), contains("YaTotA"), contains("FtbaA"), contains("FtbbA"), contains("YaRand"))

colnames(select) <- colnames(select) %>% str_remove("_Sim")

select_nd <- unique(select$IUID)

## We note that each line is an income unit, and each income unit will have a number of DepsUnder15 not noted within the income unit.

labour_r <- select %>% select(matches("r$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_s <- select %>% select(matches("s$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_1 <- select %>% select(matches("1$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_2 <- select %>% select(matches("2$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_3 <- select %>% select(matches("3$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_4 <- select %>% select(matches("4$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

colnames(labour_r) <- colnames(labour_r) %>% str_remove("r$")

colnames(labour_s) <- colnames(labour_s) %>% str_remove("s$")

colnames(labour_1) <- colnames(labour_1) %>% str_remove("1$")

colnames(labour_2) <- colnames(labour_2) %>% str_remove("2$")

colnames(labour_3) <- colnames(labour_3) %>% str_remove("3$")

colnames(labour_4) <- colnames(labour_4) %>% str_remove("4$")

labour <- labour_r %>% rbind(labour_s, labour_1, labour_2, labour_3, labour_4)

labour <- labour %>% filter(!is.na(LfStat)) %>% rename(IUWeightSu = weight)

labour_m5 <- labour

remove(outfile_m5)
```

```{r}
outfile_ubi <- read_csv("CAPITA_OUTFILE_UBI.csv")

select <- outfile_ubi %>% select(contains("LfStatr_"), contains("LfStats_"), contains("LfStat1_"), contains("LfStat2_"),
                                   contains("LfStat3_"), contains("LfStat4_"), contains("NsaSW"), 
                                   matches("AllowType"), matches("PenType"), matches("StudyType._"), matches("ActualAge._"),
                                   matches("ParPaySW._"), contains("Weight"), contains("IncDispA"),  contains("IncPrivA"),
                                   matches("^IUID"), contains("IUTypeS"), contains("DepsUnder15"), contains("IUPosS"),
                                  contains("JspTotA"), contains("YaTotA"), contains("FtbaA"), contains("FtbbA"), contains("YaRand"))

colnames(select) <- colnames(select) %>% str_remove("_Sim")

select_nd <- unique(select$IUID)

## We note that each line is an income unit, and each income unit will have a number of DepsUnder15 not noted within the income unit.

labour_r <- select %>% select(matches("r$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_s <- select %>% select(matches("s$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_1 <- select %>% select(matches("1$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_2 <- select %>% select(matches("2$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_3 <- select %>% select(matches("3$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_4 <- select %>% select(matches("4$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

colnames(labour_r) <- colnames(labour_r) %>% str_remove("r$")

colnames(labour_s) <- colnames(labour_s) %>% str_remove("s$")

colnames(labour_1) <- colnames(labour_1) %>% str_remove("1$")

colnames(labour_2) <- colnames(labour_2) %>% str_remove("2$")

colnames(labour_3) <- colnames(labour_3) %>% str_remove("3$")

colnames(labour_4) <- colnames(labour_4) %>% str_remove("4$")

labour <- labour_r %>% rbind(labour_s, labour_1, labour_2, labour_3, labour_4)

labour <- labour %>% filter(!is.na(LfStat)) %>% rename(IUWeightSu = weight)

labour_ubi <- labour

write_csv(labour_ubi, "UBI.csv")

remove(outfile_ubi)
```

```{r}
outfile_m1 <- read_csv("CAPITA_OUTFILE_M1.csv")

select <- outfile_m1 %>% select(contains("LfStatr_"), contains("LfStats_"), contains("LfStat1_"), contains("LfStat2_"),
                                   contains("LfStat3_"), contains("LfStat4_"), contains("NsaSW"), 
                                   matches("AllowType"), matches("PenType"), matches("StudyType._"), matches("ActualAge._"),
                                   matches("ParPaySW._"), contains("Weight"), contains("IncDispA"),  contains("IncPrivA"),
                                   matches("^IUID"), contains("IUTypeS"), contains("DepsUnder15"), contains("IUPosS"),
                                  contains("JspTotA"), contains("YaTotA"), contains("FtbaA"), contains("FtbbA"), contains("YaRand"))

colnames(select) <- colnames(select) %>% str_remove("_Sim")

select_nd <- unique(select$IUID)

## We note that each line is an income unit, and each income unit will have a number of DepsUnder15 not noted within the income unit.

labour_r <- select %>% select(matches("r$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_s <- select %>% select(matches("s$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_1 <- select %>% select(matches("1$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_2 <- select %>% select(matches("2$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_3 <- select %>% select(matches("3$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_4 <- select %>% select(matches("4$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

colnames(labour_r) <- colnames(labour_r) %>% str_remove("r$")

colnames(labour_s) <- colnames(labour_s) %>% str_remove("s$")

colnames(labour_1) <- colnames(labour_1) %>% str_remove("1$")

colnames(labour_2) <- colnames(labour_2) %>% str_remove("2$")

colnames(labour_3) <- colnames(labour_3) %>% str_remove("3$")

colnames(labour_4) <- colnames(labour_4) %>% str_remove("4$")

labour <- labour_r %>% rbind(labour_s, labour_1, labour_2, labour_3, labour_4)

labour <- labour %>% filter(!is.na(LfStat)) %>% rename(IUWeightSu = weight)


labour_m1 <- labour

remove(outfile_m1)
```

```{r}
outfile_m2 <- read_csv("CAPITA_OUTFILE_M2.csv")

select <- outfile_m2 %>% select(contains("LfStatr_"), contains("LfStats_"), contains("LfStat1_"), contains("LfStat2_"),
                                   contains("LfStat3_"), contains("LfStat4_"), contains("NsaSW"), 
                                   matches("AllowType"), matches("PenType"), matches("StudyType._"), matches("ActualAge._"),
                                   matches("ParPaySW._"), contains("Weight"), contains("IncDispA"),  contains("IncPrivA"),
                                   matches("^IUID"), contains("IUTypeS"), contains("DepsUnder15"), contains("IUPosS"),
                                  contains("JspTotA"), contains("YaTotA"), contains("FtbaA"), contains("FtbbA"), contains("YaRand"))

colnames(select) <- colnames(select) %>% str_remove("_Sim")

select_nd <- unique(select$IUID)

## We note that each line is an income unit, and each income unit will have a number of DepsUnder15 not noted within the income unit.

labour_r <- select %>% select(matches("r$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_s <- select %>% select(matches("s$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_1 <- select %>% select(matches("1$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_2 <- select %>% select(matches("2$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_3 <- select %>% select(matches("3$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_4 <- select %>% select(matches("4$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

colnames(labour_r) <- colnames(labour_r) %>% str_remove("r$")
 
colnames(labour_s) <- colnames(labour_s) %>% str_remove("s$")

colnames(labour_1) <- colnames(labour_1) %>% str_remove("1$")

colnames(labour_2) <- colnames(labour_2) %>% str_remove("2$")

colnames(labour_3) <- colnames(labour_3) %>% str_remove("3$")

colnames(labour_4) <- colnames(labour_4) %>% str_remove("4$")

labour <- labour_r %>% rbind(labour_s, labour_1, labour_2, labour_3, labour_4)

labour <- labour %>% filter(!is.na(LfStat)) %>% rename(IUWeightSu = weight)


labour_m2 <- labour

remove(outfile_m2)
```

```{r}
outfile_m3 <- read_csv("CAPITA_OUTFILE_M3.csv")

select <- outfile_m3 %>% select(contains("LfStatr_"), contains("LfStats_"), contains("LfStat1_"), contains("LfStat2_"),
                                   contains("LfStat3_"), contains("LfStat4_"), contains("NsaSW"), 
                                   matches("AllowType"), matches("PenType"), matches("StudyType._"), matches("ActualAge._"),
                                   matches("ParPaySW._"), contains("Weight"), contains("IncDispA"),  contains("IncPrivA"),
                                   matches("^IUID"), contains("IUTypeS"), contains("DepsUnder15"), contains("IUPosS"),
                                  contains("JspTotA"), contains("YaTotA"), contains("FtbaA"), contains("FtbbA"), contains("YaRand"))

colnames(select) <- colnames(select) %>% str_remove("_Sim")

select_nd <- unique(select$IUID)

## We note that each line is an income unit, and each income unit will have a number of DepsUnder15 not noted within the income unit.

labour_r <- select %>% select(matches("r$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_s <- select %>% select(matches("s$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_1 <- select %>% select(matches("1$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_2 <- select %>% select(matches("2$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_3 <- select %>% select(matches("3$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_4 <- select %>% select(matches("4$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

colnames(labour_r) <- colnames(labour_r) %>% str_remove("r$")

colnames(labour_s) <- colnames(labour_s) %>% str_remove("s$")

colnames(labour_1) <- colnames(labour_1) %>% str_remove("1$")

colnames(labour_2) <- colnames(labour_2) %>% str_remove("2$")

colnames(labour_3) <- colnames(labour_3) %>% str_remove("3$")

colnames(labour_4) <- colnames(labour_4) %>% str_remove("4$")

labour <- labour_r %>% rbind(labour_s, labour_1, labour_2, labour_3, labour_4)

labour <- labour %>% filter(!is.na(LfStat)) %>% rename(IUWeightSu = weight)


labour_m3 <- labour

remove(outfile_m3)
```

```{r}
outfile_m4 <- read_csv("CAPITA_OUTFILE_M4.csv")

select <- outfile_m4 %>% select(contains("LfStatr_"), contains("LfStats_"), contains("LfStat1_"), contains("LfStat2_"),
                                   contains("LfStat3_"), contains("LfStat4_"), contains("NsaSW"), 
                                   matches("AllowType"), matches("PenType"), matches("StudyType._"), matches("ActualAge._"),
                                   matches("ParPaySW._"), contains("Weight"), contains("IncDispA"),  contains("IncPrivA"),
                                   matches("^IUID"), contains("IUTypeS"), contains("DepsUnder15"), contains("IUPosS"),
                                  contains("JspTotA"), contains("YaTotA"), contains("FtbaA"), contains("FtbbA"), contains("YaRand"))

colnames(select) <- colnames(select) %>% str_remove("_Sim")

select_nd <- unique(select$IUID)

## We note that each line is an income unit, and each income unit will have a number of DepsUnder15 not noted within the income unit.

labour_r <- select %>% select(matches("r$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_s <- select %>% select(matches("s$"), "weight", contains("IUID"), contains("DepsUnder15"))

labour_1 <- select %>% select(matches("1$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_2 <- select %>% select(matches("2$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_3 <- select %>% select(matches("3$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_4 <- select %>% select(matches("4$"), "weight", contains("IUID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

colnames(labour_r) <- colnames(labour_r) %>% str_remove("r$")

colnames(labour_s) <- colnames(labour_s) %>% str_remove("s$")

colnames(labour_1) <- colnames(labour_1) %>% str_remove("1$")

colnames(labour_2) <- colnames(labour_2) %>% str_remove("2$")

colnames(labour_3) <- colnames(labour_3) %>% str_remove("3$")

colnames(labour_4) <- colnames(labour_4) %>% str_remove("4$")

labour <- labour_r %>% rbind(labour_s, labour_1, labour_2, labour_3, labour_4)

labour <- labour %>% filter(!is.na(LfStat)) %>% rename(IUWeightSu = weight)


labour_m4 <- labour

remove(outfile_m4, labour, labour_r, labour_s, labour_1, labour_2, labour_3, labour_4)
```

# RUN ALWAYS - Making combined dataset 'compare' (and compare_csv for debugging)

```{r}
labour_base <- labour_base %>% mutate(Model = "base")
labour_ubi <- labour_ubi %>% mutate(Model = "ubi")
labour_m5 <- labour_m5 %>% mutate(Model = "m5")
labour_m1 <- labour_m1 %>% mutate(Model = "m1")
labour_m2 <- labour_m2 %>% mutate(Model = "m2")
labour_m3 <- labour_m3 %>% mutate(Model = "m3")
labour_m4 <- labour_m4 %>% mutate(Model = "m4")

## To ensure that IU characteristics are consistent across models (changing social security should NOT change HH characteristics)

base_iu <- labour_base %>% select(YaRand, IUID, IncPrivA, ActualAge, PersonWeightS, StudyType, IUTypeS, IUPosS, DepsUnder15, IUWeightSu, LfStat, StudyType)

compare <- rbind(labour_base, labour_ubi, labour_m5, labour_m1, labour_m2, labour_m3, labour_m4)

test <- rbind(labour_base, labour_m5, labour_m1)

test %>% filter(IUTypeS == 3) %>% group_by(Model, IUID) %>% summarise(weight = sum(IUWeightSu), n = n()) %>% summarise(number = sum(weight), n = sum(n))

test %>% filter(IUTypeS == 1) %>% group_by(Model, IUID) %>% summarise(weight = sum(IUWeightSu)) %>% summarise(number = sum(weight))

labour_m5 %>% group_by(Model, IUID) %>% summarise(weight = min(IUWeightSu)/sum(IUWeightSu)*min(IUWeightSu)) %>% summarise(sum(weight))

compare_csv <- compare %>% 
  mutate(Pensioner = ifelse(is.na(PenType) == TRUE,0,1)) %>% 
  group_by(Model, IUID) %>%
  summarise(DispInc = sum(IncDispA), PrivInc = sum(IncPrivA), Weight = mean(IUWeightSu), Pensioners = sum(Pensioner), age = mean(ActualAge))

write_csv(compare_csv, "compare.csv")

```

# RUN ALWAYS - Equivalised incomes

Using the ABS OECD-modified equivalence scale, this is 1 for lone person, and 0.5 for each additional person 15 years or older, 0.3 for each child under 15 years. 

NOTE THAT compare_equiv displays income unit information. As such, it is useful for quick income unit comparisons between models, with no further need for grouping.

```{r}
compare <- compare %>% mutate(equivalence_factor = case_when(
  IUPosS == 1 ~ 1 + DepsUnder15*0.3,
  IUPosS > 1 ~ 0.5
))

compare_equiv <- compare %>% mutate(Pension_age = ifelse(ActualAge > "65",1,0),
                                    BI_total = YaTotA+JspTotA,
                                    Number_earning_income = ifelse(IncPrivA < 10400, 0, 1)) %>%
                            mutate(under_18_BI_total = ifelse(ActualAge < 18, BI_total,0)) %>%
  group_by(Model, IUID) %>%
  summarise(
  DispInc = sum(IncDispA), 
  PrivInc = sum(IncPrivA), 
  Weight = mean(IUWeightSu),
  age = mean(ActualAge),
  Pension_age = sum(Pension_age),
  equivalence_factor = sum(equivalence_factor),
  IUTType = mean(IUTypeS),
  FTB = sum(FtbaA+FtbbA),
  BI_total = sum(BI_total),
  under_18_BI_total = sum(under_18_BI_total),
  Num_earning_income = sum(Number_earning_income),
  famsize = n()+max(DepsUnder15)
  )

compare_equiv <- compare_equiv %>% mutate(equivalised_DispInc = DispInc/equivalence_factor)

compare_equiv <- compare_equiv %>%           # HERE I AM CREATING NEW EASIER TO UNDERSTAND INCOME UNITS
 mutate(IU_earners = as.factor(case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Couple with dependent children",
    IUTType == 2 & Num_earning_income < 1 ~ "wCouple without a significant income earner",
    IUTType == 2 & Num_earning_income < 2 ~ "Single income couple without dependent children",
    IUTType == 2 ~ "Dual income couple without dependent children",
    IUTType == 3 ~ "Lone parent with dependent children",
    TRUE ~ "Lone person"
  ))) %>%
   mutate(IU_big = as.factor(case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Couple with dependent children",
    IUTType == 2 ~ "Couple without dependent children",
    IUTType == 3 ~ "Lone parent with dependent children",
    TRUE ~ "Lone person"
  ))) %>% mutate(famweight = Weight*famsize)

base_equiv <- compare_equiv %>% filter(Model == "base")
ubi_equiv <- compare_equiv %>% filter(Model == "ubi")
m5_equiv <- compare_equiv %>% filter(Model == "m5")
m1_equiv <- compare_equiv %>% filter(Model == "m1")
m2_equiv <- compare_equiv %>% filter(Model == "m2")
m3_equiv <- compare_equiv %>% filter(Model == "m3")
m4_equiv <- compare_equiv %>% filter(Model == "m4")

equiv_gini_base <- gini(base_equiv$equivalised_DispInc, base_equiv$famweight)
equiv_gini_m5 <- gini(m5_equiv$equivalised_DispInc, m5_equiv$famweight)
equiv_gini_ubi <- gini(ubi_equiv$equivalised_DispInc, ubi_equiv$famweight)
equiv_gini_m1 <- gini(m1_equiv$equivalised_DispInc, m1_equiv$famweight)
equiv_gini_m2 <- gini(m2_equiv$equivalised_DispInc, m2_equiv$famweight)
equiv_gini_m3 <- gini(m3_equiv$equivalised_DispInc, m3_equiv$famweight)
equiv_gini_m4 <- gini(m4_equiv$equivalised_DispInc, m4_equiv$famweight)


```

Quintile comparison algorithm for equivalent disposable income (using reldist)

```{r}

#Generating quantiles of equivalised disposable income from the dataset.

base_quantiles <- data.frame(Quantile = c(0.2,0.4,0.6,0.8,1.0),EquivalDispInc = 1:5)

for(i in 1:5){
  quant <- i/5
  base_quantiles[i,2] <- wtd.quantile(base_equiv$equivalised_DispInc, q = quant, weight = base_equiv$Weight)
}

base_quant <- base_equiv %>% ungroup() %>%
  mutate(Quintile = case_when(
    equivalised_DispInc <= base_quantiles[1,2] ~ 1,
    equivalised_DispInc <= base_quantiles[2,2] ~ 2,
    equivalised_DispInc <= base_quantiles[3,2] ~ 3,
    equivalised_DispInc <= base_quantiles[4,2] ~ 4,
    TRUE ~ 5
  )) %>%           # HERE I AM CREATING NEW EASIER TO UNDERSTAND INCOME UNITS
  mutate(IU = case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Working age couple with dependent children",
    IUTType == 2 ~ "Working age couple without dependent children",
    IUTType == 3 ~ "Working age lone parent with dependent children",
    TRUE ~ "Lone person"
  )) %>% 
  select(IUID, Quintile, IU) #, Weight

compare_equiv %>% left_join(base_quant, by = "IUID") %>%
  select(everything(), -IUTType) %>%       # AND THEN DROPPING IUTTYPE
  group_by(Model, Quintile) %>%
  summarise(
  mean_equivalised_DispInc = weighted.mean(equivalised_DispInc, w = Weight),
  mean_ftb_and_under_18 = weighted.mean(FTB, w = Weight)+weighted.mean(under_18_BI_total, w = Weight), 
  median_equivalised_DispInc = weighted.median(equivalised_DispInc, w = Weight),
  num_income_units = sum(Weight)) %>%
  ungroup() %>%
  group_by(Model) %>% mutate(Proportion_within_quintile = num_income_units / sum(num_income_units))

equiv_summary <- compare_equiv %>% ungroup() %>% 
 #   select(everything(), -Weight) %>%  
  left_join(base_quant, by = "IUID") %>%
  select(everything(), -IUTType) %>%       # AND THEN DROPPING IUTTYPE
  group_by(Model, IU, Quintile) %>%
  summarise(
  mean_equivalised_DispInc = weighted.mean(equivalised_DispInc, w = Weight),
  mean_ftb_and_under_18 = weighted.mean(FTB, w = Weight)+weighted.mean(under_18_BI_total, w = Weight), 
  median_equivalised_DispInc = weighted.median(equivalised_DispInc, w = Weight),
  num_income_units = sum(Weight)) %>%
  ungroup() %>%
  group_by(Model, IU) %>% mutate(Proportion_within_quintile = num_income_units / sum(num_income_units))

equiv_list <- equiv_summary %>% gather("Key", "Value", -Model, -IU, -Quintile, -num_income_units, -Proportion_within_quintile) %>%
  mutate(Model_Key = paste0(Key, "_", Model)) %>% ungroup() %>% select(everything(), -Key, -Model) %>%
  spread(Model_Key, Value) %>% arrange(IU, Quintile)

wb <- loadWorkbook("Outputs/Equivalised income changes.xlsx")
writeData(wb, sheet = "Data", equiv_list)
saveWorkbook(wb, "Outputs/Equivalised income changes.xlsx", overwrite = T)
```


Quintile comparison algorithm for general disposable income (NOTE: still uses compare_equiv dataset)

```{r}

#Generating quantiles of disposable income from the dataset.

base_quantiles <- data.frame(Quantile = c(0.2,0.4,0.6,0.8,1.0),HHDispInc = 1:5)

for(i in 1:5){
  quant <- i/5
  base_quantiles[i,2] <- wtd.quantile(base_equiv$DispInc, q= quant, weight = base_equiv$Weight)
}

base_quant <- base_equiv %>% ungroup() %>%
  mutate(Quintile = case_when(
    DispInc < base_quantiles[1,2] ~ 1,
    DispInc < base_quantiles[2,2] ~ 2,
    DispInc < base_quantiles[3,2] ~ 3,
    DispInc < base_quantiles[4,2] ~ 4,
    TRUE ~ 5,
  )) %>%           # HERE I AM CREATING NEW EASIER TO UNDERSTAND INCOME UNITS
  mutate(IU = case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Working age couple with dependent children",
    IUTType == 2 ~ "Working age couple without dependent children",
    IUTType == 3 ~ "Working age lone parent with dependent children",
    TRUE ~ "Lone person"
  )) %>% 
  select(IUID, Quintile, IU)

inc_summary <- compare_equiv %>% ungroup() %>% 
  left_join(base_quant, by = "IUID") %>%
  select(everything(), -IUTType) %>%       # AND THEN DROPPING IUTTYPE
  group_by(Model, IU, Quintile) %>%
  summarise(
  mean_DispInc = weighted.mean(DispInc, w = Weight),
  median_DispInc = weighted.median(DispInc, w = Weight),
  num_income_units = sum(Weight)) %>% ungroup() %>%
  group_by(Model, IU) %>% mutate(Proportion_within_quintile = num_income_units / sum(num_income_units))

inc_list <- inc_summary %>% gather("Key", "Value", -Model, -IU, -Quintile, -num_income_units, -Proportion_within_quintile) %>%
  mutate(Model_Key = paste0(Key, "_", Model)) %>% ungroup() %>% select(everything(), -Key, -Model) %>%
  spread(Model_Key, Value) %>% arrange(IU, Quintile)

wb <- loadWorkbook("Outputs/Disposable income changes.xlsx")
writeData(wb, sheet = "Data", inc_list)
saveWorkbook(wb, "Outputs/Disposable income changes.xlsx", overwrite = T)

gini_base <- gini(base_equiv$DispInc, base_equiv$Weight)
gini_m5 <- gini(m4_equiv$DispInc, m4_equiv$Weight)
gini_ubi <- gini(ubi_equiv$DispInc, ubi_equiv$Weight)
```

Quintile comparison but with quintiles for each family type.

```{r}

#Generating quantiles of disposable income from the dataset.

base_quantiles <- data.frame(Quantile = c(0.2,0.4,0.6,0.8,1.0),HHDispInc = 1:5)

base_quant <- base_equiv %>% ungroup() %>%           # Assigning numbers to each unit for easy quantile.
  mutate(IU = case_when(
    Pension_age == 1 & IUTType >= 3 ~ 1,
    Pension_age >= 1 ~ 2,
    IUTType == 1 ~ 3,
    IUTType == 2 ~ 4,
    IUTType == 3 ~ 5,
    TRUE ~ 6
  )) 

for(j in 1:6){
  base_quant_specific <- base_quant %>% filter(IU == j)
  for(i in 1:5){
    quant <- i/5
    base_quantiles[i,2] <- wtd.quantile(base_quant_specific$DispInc, q= quant, weight = base_quant_specific$Weight)
  }
  
}

base_quant <- base_quant %>%
  mutate(Quintile = case_when(
    DispInc < base_quantiles[1,2] ~ 1,
    DispInc < base_quantiles[2,2] ~ 2,
    DispInc < base_quantiles[3,2] ~ 3,
    DispInc < base_quantiles[4,2] ~ 4,
    TRUE ~ 5,
  )) %>%          # Putting the easy to understand income units back in...
  mutate(IU = case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Working age couple with dependent children",
    IUTType == 2 ~ "Working age couple without dependent children",
    IUTType == 3 ~ "Working age lone parent with dependent children",
    TRUE ~ "Lone person"
  )) %>%
  select(IUID, Quintile, IU)

inc_summary <- compare_equiv %>% ungroup() %>%
  left_join(base_quant, by = "IUID") %>%
  select(everything(), -IUTType) %>%       # AND THEN DROPPING IUTTYPE
  group_by(Model, IU, Quintile) %>%
  summarise(
  mean_DispInc = weighted.mean(DispInc, w = Weight),
  median_DispInc = weighted.median(DispInc, w = Weight),
  num_income_units = sum(Weight)) %>% ungroup() %>%
  group_by(Model, IU) %>% mutate(Proportion_within_quintile = num_income_units / sum(num_income_units))

inc_list <- inc_summary %>% gather("Key", "Value", -Model, -IU, -Quintile, -num_income_units, -Proportion_within_quintile) %>%
  mutate(Model_Key = paste0(Key, "_", Model)) %>% ungroup() %>% select(everything(), -Key, -Model) %>%
  spread(Model_Key, Value) %>% arrange(IU, Quintile)

wb <- loadWorkbook("Outputs/Alt Disposable income changes.xlsx")
writeData(wb, sheet = "Data", inc_list)
saveWorkbook(wb, "Outputs/Alt Disposable income changes.xlsx", overwrite = T)

```


CHEAT METHOD

```{r}

#Generating quantiles of disposable income from the dataset.
base_quantiles <- data.frame(Quantile = c(0.2,0.4,0.6,0.8,1.0),HHDispInc = 1:5)

lazy <- data.frame(IU = 0, Quintile = 0, num_income_units = 0, Proportion_within_quintile = 0, mean_DispInc_base = 0, mean_DispInc_m5 = 0, mean_DispInc_m1 = 0, mean_DispInc_m2 = 0, mean_DispInc_m3 = 0, mean_DispInc_m4 = 0, mean_DispInc_ubi = 0, median_DispInc_base = 0, median_DispInc_m5 = 0, median_DispInc_m1 = 0, median_DispInc_m2 = 0, median_DispInc_m3 = 0, median_DispInc_m4 = 0, median_DispInc_ubi = 0)

base_quant <- base_equiv %>% ungroup() %>%           # Assigning numbers to each unit for easy quantile.
  mutate(IU = case_when(
    Pension_age == 1 & IUTType >= 3 ~ 1,
    Pension_age >= 1 ~ 2,
    IUTType == 1 ~ 3,
    IUTType == 2 ~ 4,
    IUTType == 3 ~ 5,
    TRUE ~ 6
  )) 


for(j in 1:6){
  base_equiv_specific <- base_quant %>% filter(IU == j)

#Generating quantiles of disposable income from the dataset.

for(i in 1:5){
  quant <- i/5
  base_quantiles[i,2] <- wtd.quantile(base_equiv_specific$DispInc, q= quant, weight = base_equiv_specific$Weight)
}

base_equiv_specific <- base_equiv_specific %>% ungroup() %>%
  mutate(Quintile = case_when(
    DispInc < base_quantiles[1,2] ~ 1,
    DispInc < base_quantiles[2,2] ~ 2,
    DispInc < base_quantiles[3,2] ~ 3,
    DispInc < base_quantiles[4,2] ~ 4,
    TRUE ~ 5,
  )) %>% 
  select(IUID, Quintile, IU)

inc_summary <- compare_equiv %>%
  ungroup() %>%
  left_join(base_equiv_specific, by = "IUID") %>%
  filter(IU == j)  %>%        # MAGIC HAPPENS HERE
 # HERE I AM CREATING NEW EASIER TO UNDERSTAND INCOME UNITS
  mutate(IU = case_when(
    IU == 1 ~ "Single above pension age",
    IU == 2 ~ "Couple above pension age",
    IU == 3 ~ "Working age couple with dependent children",
    IU == 4 ~ "Working age couple without dependent children",
    IU == 5 ~ "Working age lone parent with dependent children",
    TRUE ~ "Lone person"
  )) %>%
  select(everything(), -IUTType) %>%       # AND THEN DROPPING IUTTYPE
  group_by(Model, IU, Quintile) %>%
  summarise(
  mean_DispInc = weighted.mean(DispInc, w = Weight),
  median_DispInc = weighted.median(DispInc, w = Weight), 
  num_income_units = sum(Weight)) %>% ungroup() %>%
  group_by(Model, IU) %>% mutate(Proportion_within_quintile = num_income_units / sum(num_income_units))

inc_list <- inc_summary %>% gather("Key", "Value", -Model, -IU, -Quintile, -num_income_units, -Proportion_within_quintile) %>%
  mutate(Model_Key = paste0(Key, "_", Model)) %>% ungroup() %>% select(everything(), -Key, -Model) %>%
  spread(Model_Key, Value) %>% arrange(IU, Quintile)

lazy <- lazy %>% rbind(inc_list)
}

lazy <- lazy %>% filter(IU != 0)

wb <- loadWorkbook("Outputs/Alt Disposable income changes.xlsx")
writeData(wb, sheet = "Data", lazy)
saveWorkbook(wb, "Outputs/Alt Disposable income changes.xlsx", overwrite = T)

```

# 4 - Using 'compare' for Henderson poverty lines

```{r}

poverty_set <- compare %>% ungroup() %>%
  mutate(head_in_wf = ifelse(IUPosS == 1 & LfStat %in% c("FT","PT"),1,0), Pension_age = ifelse(ActualAge > "65",1,0)) %>%
  group_by(Model, IUID) %>%
  summarise(
    IncDisp = sum(IncDispA),
    head_in_wf = sum(head_in_wf),
    IUTypeS = mean(IUTypeS),
    deps_under_15 = mean(DepsUnder15),
    persons_over_15 = n(),
    Weight = mean(IUWeightSu),
    Pension_age = sum(Pension_age)
  ) %>% ungroup()

poverty_set <- poverty_set %>% mutate(total_persons = persons_over_15+deps_under_15) %>%
  mutate(hwf_poverty_line = case_when(
                                      IUTypeS == 2 ~ 726.27,
                                      IUTypeS == 1 & total_persons == 3 ~ 873.01,
                                      IUTypeS == 1 & total_persons == 4 ~ 1019.75,
                                      IUTypeS == 1 & total_persons == 5 ~ 1166.49,
                                      IUTypeS == 1 & total_persons > 5 ~ 1313.24,
                                      IUTypeS == 4 ~ 542.92,
                                      IUTypeS == 3 & total_persons == 2 ~ 697,
                                      IUTypeS == 3 & total_persons == 3 ~ 843.64,
                                      IUTypeS == 3 & total_persons == 4 ~ 990.38,
                                      IUTypeS == 3 & total_persons > 4 ~ 1137.13,
                                      TRUE ~ 1137.13
                                      )) %>%
  mutate(hnwf_poverty_line = case_when(
                                      IUTypeS == 2 ~ 623.58,
                                      IUTypeS == 1 & total_persons == 3 ~ 770.32,
                                      IUTypeS == 1 & total_persons == 4 ~ 917.06,
                                      IUTypeS == 1 & total_persons == 5 ~ 1063.81,
                                      IUTypeS == 1 & total_persons > 5 ~ 1210.55,
                                      IUTypeS == 4 ~ 440.23,
                                      IUTypeS == 3 & total_persons == 2 ~ 594.21,
                                      IUTypeS == 3 & total_persons == 3 ~ 740.95,
                                      IUTypeS == 3 & total_persons == 4 ~ 887.69,
                                      IUTypeS == 3 & total_persons > 4 ~ 1034.44,
                                      TRUE ~ 1034.44   
  )) %>% 
  mutate(poverty_line = ifelse(head_in_wf == 1, hwf_poverty_line, hnwf_poverty_line)) %>%
  select(everything(), -hwf_poverty_line, -hnwf_poverty_line) %>%
  mutate(poverty_indicator = ifelse(IncDisp/52 < poverty_line, 1, 0)) %>%
  
    mutate(IU = case_when(
    Pension_age == 1 & IUTypeS >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTypeS == 1 ~ "Working age couple with dependent children",
    IUTypeS == 2 ~ "Working age couple without dependent children",
    IUTypeS == 3 ~ "Working age lone parent with dependent children",
    TRUE ~ "Lone person"
  ))

output_pov_hend <- poverty_set %>% count(Model, IU, poverty_indicator, wt = Weight) %>% group_by(Model, IU) %>% mutate(Prop_in_poverty = n/sum(n)) %>% filter(poverty_indicator == 1) %>%
  select(everything(), -poverty_indicator)

wb <- loadWorkbook("Outputs/Henderson poverty.xlsx")
writeData(wb, sheet = "Data", output_pov_hend)
saveWorkbook(wb, "Outputs/Henderson poverty.xlsx", overwrite = T)

# 1 = Couple with dependent children, 2 = Couple without dependent children, 3 = Lone parent with dependent children, 4 = Lone person.

```

Will want to also do basic poverty line (50% of median income).

```{r}

poverty_set <- compare %>% ungroup() %>%
  mutate(head_in_wf = ifelse(IUPosS == 1 & LfStat != "NILF",1,0), Pension_age = ifelse(ActualAge > "65",1,0)) %>%
  group_by(Model, IUID) %>%
  summarise(
    IncDisp = sum(IncDispA),
    head_in_wf = sum(head_in_wf),
    IUTypeS = mean(IUTypeS),
    deps_under_15 = mean(DepsUnder15),
    persons_over_15 = n(),
    Weight = mean(IUWeightSu),
    Pension_age = sum(Pension_age)
  ) %>% ungroup()

poverty_median_m5 <- poverty_set %>% filter(Model == "m5")
poverty_median_ubi <- poverty_set %>% filter(Model == "ubi")
poverty_median_base <- poverty_set %>% filter(Model == "base")

poverty_median_m5 <- weighted.median(poverty_median_m5$IncDisp, w = poverty_median_m5$Weight)/2
poverty_median_ubi <- weighted.median(poverty_median_ubi$IncDisp, w = poverty_median_ubi$Weight)/2
poverty_median_base <- weighted.median(poverty_median_base$IncDisp, w = poverty_median_base$Weight)/2

poverty_set <- poverty_set %>% mutate(total_persons = persons_over_15+deps_under_15) %>%
  mutate(poverty_line = case_when(
                                      Model == "m5" ~ poverty_median_m5,
                                      Model == "ubi" ~ poverty_median_ubi,
                                      Model == "base" ~ poverty_median_base,
                                      TRUE ~ poverty_median_base
                                      )) %>%
  mutate(poverty_indicator = ifelse(IncDisp < poverty_line, 1, 0)) %>%
  
    mutate(IU = case_when(
    Pension_age == 1 & IUTypeS >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTypeS == 1 ~ "Working age couple with dependent children",
    IUTypeS == 2 ~ "Working age couple without dependent children",
    IUTypeS == 3 ~ "Working age lone parent with dependent children",
    TRUE ~ "Lone person"
  ))

output_pov_median <- poverty_set %>% count(Model, IU, poverty_indicator, wt = Weight) %>% group_by(Model, IU) %>% mutate(Prop_in_poverty = n/sum(n)) %>% filter(poverty_indicator == 1)


output_pov_median

# 1 = Couple with dependent children, 2 = Couple without dependent children, 3 = Lone parent with dependent children, 4 = Lone person.


```

Median income calcs done here with compare_equiv (which has already been summarised by IU)

```{r}
poverty_set <- compare_equiv %>% ungroup()

poverty_median_m5 <- poverty_set %>% filter(Model == "m5")
poverty_median_ubi <- poverty_set %>% filter(Model == "ubi")
poverty_median_base <- poverty_set %>% filter(Model == "base")
poverty_median_m1 <- poverty_set %>% filter(Model == "m1")
poverty_median_m2 <- poverty_set %>% filter(Model == "m2")
poverty_median_m3 <- poverty_set %>% filter(Model == "m3")
poverty_median_m4 <- poverty_set %>% filter(Model == "m4")

data.frame(
  Model = c("m5","ubi","base","m1","m2","m3","m4"),
  median_DispInc = c(weighted.median(poverty_median_m5$DispInc,w = poverty_median_m5$Weight),   
                              weighted.median(poverty_median_ubi$DispInc, w = poverty_median_ubi$Weight),
                              weighted.median(poverty_median_base$DispInc, w = poverty_median_base$Weight),
                     weighted.median(poverty_median_m1$DispInc, w = poverty_median_m1$Weight),
                     weighted.median(poverty_median_m2$DispInc, w = poverty_median_m2$Weight),
                     weighted.median(poverty_median_m3$DispInc, w = poverty_median_m3$Weight),
                     weighted.median(poverty_median_m4$DispInc, w = poverty_median_m4$Weight)
                     ),
           median_equivalised_DispInc = c(
             weighted.median(poverty_median_m5$equivalised_DispInc,w = poverty_median_m5$Weight),
             weighted.median(poverty_median_ubi$equivalised_DispInc, w = poverty_median_ubi$Weight),
             weighted.median(poverty_median_base$equivalised_DispInc, w = poverty_median_base$Weight),
             weighted.median(poverty_median_m1$equivalised_DispInc, w = poverty_median_m1$Weight),
                     weighted.median(poverty_median_m2$equivalised_DispInc, w = poverty_median_m2$Weight),
                     weighted.median(poverty_median_m3$equivalised_DispInc, w = poverty_median_m3$Weight),
                     weighted.median(poverty_median_m4$equivalised_DispInc, w = poverty_median_m4$Weight)
           )
)





```

# Geom_density plots for different household types!

```{r}
compare %>% 
  mutate(Pensioner = ifelse(is.na(PenType) == TRUE,0,1)) %>% mutate(Model = fct_relevel(Model,"base","m1","m2","m3","m4","m5","ubi")) %>%
  group_by(Model, IUID) %>%
  summarise(DispInc = sum(IncDispA), PrivInc = sum(IncPrivA), Weight = mean(IUWeightSu), Pensioners = sum(Pensioner), IUTypeS = mean(IUTypeS)) %>%
  filter(Pensioners == 0) %>% mutate(IU = case_when(
    IUTypeS == 1 ~ "Working age couple with dependent children",
    IUTypeS == 2 ~ "Working age couple without dependent children",
    IUTypeS == 3 ~ "Working age lone parent with dependent children",
    TRUE ~ "Working age lone person"
  )) %>%
  ggplot(aes(x = DispInc, weight = Weight, fill = IU)) + geom_density(alpha = .3) + xlim(0,300000)+facet_wrap(~Model, ncol = 1)

```


Graph: Equivalised income distribution for income units, by policy model
```{r}
compare_equiv  %>% mutate(Model = factor(Model, levels = c("base","m1","m2","m3","m4","m5","ubi"))) %>% 
  mutate(IU = case_when(
    IUTType == 1 ~ "Couple with dependent children",
    IUTType == 2 ~ "Couple without dependent children",
    IUTType == 3 ~ "Single parent with dependent children",
    TRUE ~ "Single person"
  )) %>% filter(Model %in% c("base","ubi","m4")) %>%
  ggplot(aes(x = equivalised_DispInc, weight = Weight, fill = Model)) + geom_density(alpha = .3) + xlim(0,100000)+facet_wrap(~IU, ncol = 1)+
  theme_minimal()+
  labs(x = "Equivalised disposable income ($)", y = "Density", fill = "Model")+
  scale_fill_discrete(labels = c("Existing policy", "GMI", "UBI (Model 6)"))+ 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.y = element_blank())+
  theme(strip.text.x = element_text(size=12, face="bold"), legend.position = "top", legend.text = element_text(size = 11), legend.title = element_blank())
ggsave("BI_equivalised_income_distributions.png", width = 7, height = 8, , dpi = 1200)
  

```


# Bits for the eligibility criteria section

Looking at '% eligible':

```{r}
eligibility_set <- compare %>% 
  filter(Model == "base", ActualAge >= 18)  %>% 
  mutate(
    Pension_age = ifelse(ActualAge > "65",PersonWeightS,0),
    Pensions = ifelse(is.na(PenType) == TRUE, 0, PersonWeightS),
    LF_elig = case_when(LfStat %in% c("FT", "PT") ~ PersonWeightS, NsaSW > 0 ~ PersonWeightS, TRUE ~ 0)) %>%
  mutate(
    Parent_eligible = case_when(DepsUnder15 > 0 ~ PersonWeightS, ParPaySW > 0 ~ PersonWeightS, TRUE ~ 0),
    Student_eligible = case_when(StudyType %in% c("SS","FTNS") ~ PersonWeightS, TRUE ~ 0),
    Pension_eligible = case_when(Pensions > 0 ~ PersonWeightS, TRUE ~ 0)
    ) %>%
  mutate(M1 = LF_elig, M2 = pmax(LF_elig, Student_eligible), M3 = pmax(Pension_eligible, LF_elig, Student_eligible, Parent_eligible)) %>%
  mutate(IU = as.factor(case_when(
    IUTypeS == 1 ~ "couple with dependent children",
    IUTypeS == 2 ~ "couple without dependent children",
    IUTypeS == 3 ~ "lone parent with dependent children",
    TRUE ~ "Lone person"
    ))) %>% group_by(IU) %>%
  summarise(
    Persons = sum(PersonWeightS),
    LF = sum(M1)/sum(PersonWeightS),
    Students = sum(M2)/sum(PersonWeightS),
    Parents = sum(M3)/sum(PersonWeightS)
  )
  compare %>% filter(Model == "ubi")
#And alternatively:

receiving_bi_set <- compare %>% mutate(receiving_bi = case_when(
  !is.na(PenType) & PenType != "PPS" ~ 1,
  (YaTotA > 0 | JspTotA > 0) & AllowType %in% c("NSA", "YASTUD") ~ 1,
  TRUE ~ 0
))
  
lazy <- receiving_bi_set %>% filter(ActualAge >= 18 & ActualAge < 65)  %>% 
  mutate(IU = case_when(
    IUTypeS == 1 ~ "Working age couple with dependent children",
    IUTypeS == 2 ~ "Working age couple without dependent children",
    IUTypeS == 3 ~ "Working age lone parent with dependent children",
    TRUE ~ "Lone person"
  )) %>% 
   group_by(Model, IU) %>%
  summarise(
  Population = sum(PersonWeightS), 
  Proportion_receiving = sum(receiving_bi*PersonWeightS)/sum(PersonWeightS)
  )

wb <- loadWorkbook("Outputs/Proportions receiving BI.xlsx")
writeData(wb, sheet = "Data", lazy)
writeData(wb, sheet = "Data2", eligibility_set)
saveWorkbook(wb, "Outputs/Proportions receiving BI.xlsx", overwrite = T)

receiving_bi_set %>% filter(ActualAge >= 18 & ActualAge < 65)  %>% 
  mutate(IU = case_when(
    IUTypeS == 1 ~ "Couple with dependent children",
    IUTypeS == 2 ~ "Couple without dependent children",
    IUTypeS == 3 ~ "Single parent with dependent children",
    TRUE ~ "Single person"
  )) %>% filter(Model %in% c("m1","m3","m5"))  %>% mutate(IU = as_factor(IU)) %>%
   group_by(Model, IU, ActualAge) %>%
  summarise(
  Population = sum(PersonWeightS), 
  Population_receiving = sum(receiving_bi*PersonWeightS),
  Proportion_receiving = sum(receiving_bi*PersonWeightS)/sum(PersonWeightS)
  ) %>%
  ggplot(aes(ActualAge, Population_receiving, color = Model))+geom_line()+facet_wrap(~IU, ncol = 1)+
  theme_minimal()+
  labs(x = "Age", y = "Number of persons", color = "Model")+
  scale_color_discrete(labels = c("Model 5", "Model 1", "Model 3"))+ 
  theme(panel.grid.minor = element_blank(), legend.text = element_text(size = 11))+
  theme(strip.text.x = element_text(size=12, face="bold"))
ggsave("BI_recipients_by_age.png", width = 7, height = 8)

### AND AN INVESTIGATION INTO SINGLE-INCOME VS COUPLE

couple_type_receiving <- left_join(receiving_bi_set, compare_equiv, by = "IUID") %>% filter(IUTType == 2, ActualAge %in% c(50:64)) %>% group_by(Model.x, Num_earning_income) %>%
  summarise(
    Population = sum(PersonWeightS), 
  Population_receiving = sum(receiving_bi*PersonWeightS),
  Proportion_receiving = sum(receiving_bi*PersonWeightS)/sum(PersonWeightS)
  ) %>% filter(Model.x %in% c("m1", "m3", "m5")) %>%
count(Num_earning_income, Population_receiving)

```

Making tables for BI amounts going to different ages within HH types, in different models

```{r}
sum_bi_set <- compare %>% mutate(bi_total = YaTotA+JspTotA,
                                 Age_group = case_when(
                                   ActualAge < 18  ~ "Under 18",
                                   ActualAge %in% c(18:24) ~ "18-24",
                                   ActualAge %in% c(25:34) ~ "25-34",
                                   ActualAge %in% c(35:44) ~ "35-44",
                                   ActualAge %in% c(45:54) ~ "45-54",
                                   ActualAge %in% c(55:64) ~ "55-64",
                                   TRUE ~ "65 and over"
                                 )) %>% 
  mutate(IU = case_when(
    IUTypeS == 1 ~ "Working age couple with dependent children",
    IUTypeS == 2 ~ "Working age couple without dependent children",
    IUTypeS == 3 ~ "Working age lone parent with dependent children",
    TRUE ~ "Lone person"
  )) %>%
  group_by(Model, IU, Age_group) %>% summarise(bi_total = sum(bi_total * PersonWeightS), num_people = sum(PersonWeightS))

sum_ftb <- compare %>% mutate(ftb_total = FtbaA+FtbbA,
                              Age_group = case_when(
                                   ActualAge < 18  ~ "Under 18",
                                   ActualAge %in% c(18:24) ~ "18-24",
                                   ActualAge %in% c(25:34) ~ "25-34",
                                   ActualAge %in% c(35:44) ~ "35-44",
                                   ActualAge %in% c(45:54) ~ "45-54",
                                   ActualAge %in% c(55:64) ~ "55-64",
                                   TRUE ~ "65 and over"
                                 )) %>%
    mutate(IU = case_when(
    IUTypeS == 1 ~ "Working age couple with dependent children",
    IUTypeS == 2 ~ "Working age couple without dependent children",
    IUTypeS == 3 ~ "Working age lone parent with dependent children",
    TRUE ~ "Lone person"
  )) %>%
  group_by(Model, IU) %>% summarise(ftb_total = sum(ftb_total * PersonWeightS))

wb <- loadWorkbook("Outputs/Proportions receiving BI.xlsx")
writeData(wb, sheet = "Data3", sum_bi_set)
writeData(wb, sheet = "Data4", sum_ftb)
saveWorkbook(wb, "Outputs/Proportions receiving BI.xlsx", overwrite = T)


```


Graphs for analysis:

```{r}
compare_equiv  %>% mutate(Model = factor(Model, levels = c("m5","m3","base","m1","m2","m4","ubi"))) %>% 
  filter(Model %in% c("base","m3","m5")) %>% filter(Pension_age == 0) %>%
  mutate(IU = as.factor(case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Working age couple with dependent children",
    IUTType == 2 ~ "Working age couple without dependent children",
    IUTType == 3 ~ "Working age lone parent with dependent children",
    TRUE ~ "Lone person"
  ))) %>%
  ggplot(aes(x = equivalised_DispInc, weight = Weight, fill = Model)) + geom_density(alpha = .3) + xlim(0,100000)+facet_wrap(~IU, ncol = 1)

```

# Looking at single income vs dual income couples with no children

```{r}
compare_expanded <- compare_equiv  %>% mutate(Model = factor(Model, levels = c("m3","base","m5","m4","m1","m2","ubi"))) %>% 
  mutate(Model = fct_relevel(Model, "m4", after = 1)) %>%
  filter(Model %in% c("base","ubi","m5", "m4")) %>% 
  mutate(Model = fct_relevel(Model,"base", after = 0)) %>%
  mutate(IU = as.factor(case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Couple with dependent children",
    IUTType == 2 & Num_earning_income < 2 ~ "Single income couple without dependent children",
    IUTType == 2 ~ "Dual income couple without dependent children",
    IUTType == 3 ~ "Lone parent with dependent children",
    TRUE ~ "Lone person"
  ))) %>% mutate(IU = fct_relevel(IU,"Dual income couple without dependent children", after = 7)) %>%
   mutate(IU_big = as.factor(case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Couple with dependent children",
    IUTType == 2 ~ "Couple without dependent children",
    IUTType == 3 ~ "Lone parent with dependent children",
    TRUE ~ "Lone person"
  )))

compare_expanded %>% filter(IU_big %in% c("Couple with dependent children", "Couple without dependent children")) %>% 
  ggplot(aes(x = IU_big, y = equivalised_DispInc, weight = Weight, fill = Model)) + geom_boxplot()+
  theme_minimal()+theme(axis.title.x = element_blank(), axis.title.y = element_blank())+
  coord_cartesian(ylim = c(0,150000))+
  scale_y_continuous(labels = dollar_format(), )+
  scale_x_discrete(labels = c("Couple with children", "Couple without children", "Single parent", "Single person"))+
  labs(title = "Equivalised disposable income distribution for working-age IUs", x = "IU Type", y = "Equivalised disposable income", fill = "Model")
ggsave("boxplots.jpg", width = 8, height = 4.5)

quartiles <- compare_expanded %>% group_by(Model, IU) %>% summarise(
  `0.25` = weighted.quantile(equivalised_DispInc, w = Weight, probs = seq(0.25,0.25,0)),
  `0.5` = weighted.quantile(equivalised_DispInc, w = Weight, probs = seq(0.5,0.5,0)), 
  `0.75` = weighted.quantile(equivalised_DispInc, w = Weight, probs = seq(0.75,0.75,0)), 
  )

compare_equiv %>% mutate(under_18 = ifelse(age<18, "Y", "N")) %>% filter(Model == "base") %>% group_by(IUTType, under_18) %>% summarise(num = sum(Weight)) %>% summarise(prop = num/sum(num))
```


Making quantiles for equivalised disposable income on the more detailed level

```{r}
base_quantiles <- data.frame(Quantile = c(0.2,0.4,0.6,0.8,1.0),EquivalDispInc = 1:5)

for(i in 1:5){
  quant <- i/5
  base_quantiles[i,2] <- wtd.quantile(base_equiv$equivalised_DispInc, q = quant, weight = base_equiv$Weight)
}

base_quant <- base_equiv %>% ungroup() %>%
  mutate(Quintile = case_when(
    equivalised_DispInc < base_quantiles[1,2] ~ 1,
    equivalised_DispInc < base_quantiles[2,2] ~ 2,
    equivalised_DispInc < base_quantiles[3,2] ~ 3,
    equivalised_DispInc < base_quantiles[4,2] ~ 4,
    TRUE ~ 5
  )) %>%           # HERE I AM CREATING NEW EASIER TO UNDERSTAND INCOME UNITS
 mutate(IU = as.factor(case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 & Num_earning_income < 1 ~ "No income couple with dependent children",
    IUTType == 1 & Num_earning_income < 2 ~ "Single income couple with dependent children",
    IUTType == 1 ~ "Dual income couple with dependent children",
    IUTType == 2 & Num_earning_income < 1 ~ "No income couple without dependent children",
    IUTType == 2 & Num_earning_income < 2 ~ "Single income couple without dependent children",
    IUTType == 2 ~ "Dual income couple without dependent children",
    IUTType == 3 ~ "Lone parent with dependent children",
    TRUE ~ "Lone person"
  ))) %>% 
  select(IUID, Quintile, IU) #, Weight

equiv_summary <- compare_equiv %>% ungroup() %>% 
 #   select(everything(), -Weight) %>%  
  left_join(base_quant, by = "IUID") %>%
  select(everything(), -IUTType) %>%       # AND THEN DROPPING IUTTYPE
  group_by(Model, IU, Quintile) %>%
  summarise(
  mean_equivalised_DispInc = weighted.mean(equivalised_DispInc, w = Weight),
  mean_ftb_and_under_18 = weighted.mean(FTB, w = Weight)+weighted.mean(under_18_BI_total, w = Weight), 
  median_equivalised_DispInc = weighted.median(equivalised_DispInc, w = Weight),
  num_income_units = sum(Weight)) %>%
  ungroup() %>%
  group_by(Model, IU) %>% mutate(Proportion_within_quintile = num_income_units / sum(num_income_units))

equiv_list <- equiv_summary %>% gather("Key", "Value", -Model, -IU, -Quintile, -num_income_units, -Proportion_within_quintile) %>%
  mutate(Model_Key = paste0(Key, "_", Model)) %>% ungroup() %>% select(everything(), -Key, -Model) %>%
  spread(Model_Key, Value) %>% arrange(IU, Quintile)

wb <- loadWorkbook("Outputs/Equivalised income changes.xlsx")
writeData(wb, sheet = "Data2", equiv_list)
saveWorkbook(wb, "Outputs/Equivalised income changes.xlsx", overwrite = T)

compare_equiv %>% left_join(base_quant, by = "IUID") %>%
filter(str_detect(IU,"couple without dependent children"), Model %in% c("base", "m5", "m4", "ubi")) %>% 
  mutate(IU = fct_relevel(IU, "Dual income couple without dependent children", after = 10)) %>% mutate(Model = factor(Model, levels = c("m3","base","m5","m4","m1","m2","ubi"))) %>% 
  mutate(Model = fct_relevel(Model, "m4", after = 1)) %>% mutate(Model = fct_relevel(Model, "base", after = 0)) %>%
  ggplot(aes(x = IU, y = equivalised_DispInc, weight = Weight, fill = Model)) + geom_boxplot()+
  theme_minimal()+theme(axis.title.x = element_blank(), axis.title.y = element_blank())+
  coord_cartesian(ylim = c(0,150000))+
  scale_y_continuous(labels = dollar_format(), )+
  scale_x_discrete(labels = c("No significant income earners", "Single income", "Dual income"))+
  labs(x = "IU Type", y = "Equivalised disposable income", fill = "Model")+
  theme(legend.position = "top", legend.text = element_text(size = 12), legend.title = element_blank(), axis.text.x = element_text(size = 12))
ggsave("couple_no_deps_boxplots.jpg", width = 8, height = 4.5, dpi = 1200)

compare_equiv %>% left_join(base_quant, by = "IUID") %>%
filter(str_detect(IU,"couple with dependent children"), Model %in% c("base", "m4", "m5", "ubi")) %>% 
  mutate(IU = fct_relevel(IU, "Dual income couple with dependent children", after = 10)) %>% mutate(Model = factor(Model, levels = c("m3","base","m5","m4","m1","m2","ubi"))) %>% 
  mutate(Model = fct_relevel(Model, "m4", after = 1)) %>% mutate(Model = fct_relevel(Model, "base", after = 0)) %>%
  ggplot(aes(x = IU, y = equivalised_DispInc, weight = Weight, fill = Model)) + geom_boxplot()+
  theme_minimal()+theme(axis.title.x = element_blank(), axis.title.y = element_blank())+
  coord_cartesian(ylim = c(0,150000))+
  scale_y_continuous(labels = dollar_format(), )+
  scale_x_discrete(labels = c("No significant income earners", "Single income", "Dual income"))+
  labs(x = "IU Type", y = "Equivalised disposable income", fill = "Model")
  theme(legend.position = "top", legend.text = element_text(size = 12), legend.title = element_blank(), axis.text.x = element_text(size = 12))
ggsave("couple_deps_boxplots.jpg", width = 8, height = 4.5, dpi = 1200)

compare_equiv %>% left_join(base_quant, by = "IUID") %>%
filter(str_detect(IU,"Lone"), Model %in% c("base", "m4", "m5", "ubi")) %>% 
  mutate(IU = fct_relevel(IU, "Dual income couple with dependent children", after = 10)) %>% mutate(Model = factor(Model, levels = c("m3","base","m5","m4","m1","m2","ubi"))) %>% 
  mutate(Model = fct_relevel(Model, "m4", after = 1)) %>% mutate(Model = fct_relevel(Model, "base", after = 0)) %>%
  ggplot(aes(x = IU, y = equivalised_DispInc, weight = Weight, fill = Model)) + geom_boxplot()+
  theme_minimal()+theme(axis.title.x = element_blank(), axis.title.y = element_blank())+
  coord_cartesian(ylim = c(0,150000))+
  scale_y_continuous(labels = dollar_format(), )+
  scale_x_discrete(labels = c("Single parent", "Single person"))+
  labs(x = "IU Type", y = "Equivalised disposable income", fill = "Model")
  theme(legend.position = "top", legend.text = element_text(size = 12), legend.title = element_blank(), axis.text.x = element_text(size = 12))
ggsave("singles.png", width = 8, height = 4.5, dpi = 1200)


```

Percentage income changes

```{r}
average_changes <- compare_equiv %>%           # HERE I AM CREATING NEW EASIER TO UNDERSTAND INCOME UNITS - have taken out pension age categorisation for now
 mutate(IU = as.factor(case_when(
#    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
#    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Couple with dependent children",
    IUTType == 2 & Num_earning_income < 1 ~ "wCouple without a significant income earner",
    IUTType == 2 & Num_earning_income < 2 ~ "Single income couple without dependent children",
    IUTType == 2 ~ "Dual income couple without dependent children",
    IUTType == 3 ~ "Lone parent with dependent children",
    TRUE ~ "Lone person"
  ))) %>%
   mutate(IU_big = as.factor(case_when(
#    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
#    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Couple with dependent children",
    IUTType == 2 ~ "Couple without dependent children",
    IUTType == 3 ~ "Lone parent with dependent children",
    TRUE ~ "Lone person"
  ))) %>%
  select(IUID, DispInc, Weight, age, famsize, PrivInc, IU, IU_big, equivalised_DispInc, Weight) %>%
  pivot_wider(id_cols = c(IUID, IU, IU_big, age, famsize, PrivInc, Weight), names_from = Model, values_from = equivalised_DispInc) %>%
  select(IUID, IU, IU_big, age, famsize, PrivInc, Weight, base, m5, ubi, m1, m2, m3, m4) %>%
  filter(!is.na(m5), !is.na(ubi), !is.na(base), !is.na(m3), !is.na(m4), !is.na(m1), !is.na(m2)) %>%
  mutate(change_ubi = (ubi-base)/base, change_m5 = (m5-base)/base, better_off_ubi = ifelse(ubi>base, 1, 0), better_off_m5 = ifelse(m5>base, 1, 0))

average_changes %>% filter(IU_big == "Couple without dependent children") %>% ggplot(aes(x = base, y = ubi, color = IU)) + geom_point(aes(alpha = Weight))+ 
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Existing policy", y = "Model 6: UBI", color = "Couple type")+
  scale_colour_hue(labels = c("Dual income couple", "Single income couple", "No significant income earners"))+guides(alpha = "none")+
  scale_alpha(range = c(0.3,1))+
  theme(legend.text = element_text(size = 11))

```

Quickly producing graph results:

```{r}
setwd("C:/Users/PeterLimerick/Desktop/CAPITA-master/Output analysis/Comparison graphs for couples")


model_names <- c("m1", "m2", "m3", "m4", "m5", "ubi")

model_comp <- average_changes %>% pivot_longer(cols = m5:m4, names_to = "Model", values_to = "Equiv_disp") %>% 
  mutate(Model = factor(Model, levels = c("m1", "m2", "m3", "m4", "m5", "ubi")))

model_comp <- model_comp %>% mutate(Model_n = case_when(
  Model == "m1" ~ "Model 1",
  Model == "m2" ~ "Model 2",
  Model == "m3" ~ "Model 3",
  Model == "m4" ~ "GMI",
  Model == "m5" ~ "Model 5",
  Model == "ubi" ~ "UBI"
))

for (i in model_names){
  model_comp %>% 
    filter(IU_big == "Couple without dependent children",
           Model == i) %>%
    ggplot(aes(x = base, y = Equiv_disp, color = IU)) + geom_point(aes(alpha = Weight))+ 
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Existing policy", y = paste0("Model ",i), color = "Couple type")+
  scale_colour_hue(labels = c("Dual income couple", "Single income couple", "No significant income earners"))+guides(alpha = "none")+
  scale_alpha(range = c(0.05,1))+
  theme(legend.text = element_text(size = 11))
ggsave(paste0("coupletypes_existing_",i,".jpg"), width = 7.29, height = 4.5, dpi = 600)
}

model_comp %>% 
    filter(Model_n %in% c("UBI", "GMI"), IU_big == "Couple without dependent children") %>%
    ggplot(aes(x = base, y = Equiv_disp, color = IU)) + geom_point(aes(alpha = Weight))+ 
  facet_wrap(~Model_n, nrow = 3, ncol = 2)+
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Equivalised household income under existing policy", y = "Equivalised household income under BI model", color = "Couple type")+
  scale_colour_hue(labels = c("Dual income couple", "Single income couple", "No significant income earners"))+guides(alpha = "none")+
  scale_alpha(range = c(0.05,1))+
  theme(legend.text = element_text(size = 11), legend.position = "bottom", legend.direction = "vertical")
ggsave("coupletypes_comp.png",  width = 9, height = 6, dpi = 600)

setwd("C:/Users/PeterLimerick/Desktop/CAPITA-master/Output analysis/Comparison graphs between IU types")

for (i in model_names){
  model_comp %>% 
    filter(IU_big %in% c("Couple without dependent children", "Couple with dependent children", "Lone person"),
           Model == i) %>%
    ggplot(aes(x = base, y = Equiv_disp, color = IU_big)) + geom_point(aes(alpha = Weight))+ 
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Existing policy", y = paste0("Model ",i), color = "Income unit type")+guides(alpha = "none")+
  scale_alpha(range = c(0.05,1))+
  theme(legend.text = element_text(size = 11))
ggsave(paste0("IUtypes_existing_",i,".jpg"), width = 7.29, height = 4.5, dpi = 600)
}

model_comp %>% 
    filter(Model_n %in% c("UBI", "GMI"), IU_big %in% c("Couple without dependent children",  "Lone person")) %>%
    ggplot(aes(x = base, y = Equiv_disp, color = IU_big)) + geom_point(aes(alpha = Weight))+
  facet_wrap(~Model_n, nrow = 3, ncol = 2)+
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Equivalised household income under existing policy", y = "Equivalised household income under BI model", color = "Income unit type")+guides(alpha = "none")+
  scale_alpha(range = c(0.05,1))+
  theme(legend.text = element_text(size = 11), legend.position = "bottom", legend.direction = "vertical")
ggsave("IUtypes_nochild_comp.png", width = 9, height = 6, dpi = 600)

model_comp %>% 
    filter(Model_n %in% c("UBI", "GMI"), IU_big %in% c("Couple with dependent children", "Lone parent with dependent children")) %>%
    ggplot(aes(x = base, y = Equiv_disp, color = IU_big)) + geom_point(aes(alpha = Weight))+
  facet_wrap(~Model_n, nrow = 3, ncol = 2)+
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Equivalised household income under existing policy", y = "Equivalised household income under BI model", color = "Family type")+guides(alpha = "none")+
  scale_alpha(range = c(0.05,1))+
  theme(legend.text = element_text(size = 11), legend.position = "bottom", legend.direction = "vertical")
ggsave("IUtypes_child_comp.png", width = 9, height = 6, dpi = 600)

setwd("C:/Users/PeterLimerick/Desktop/CAPITA-master/Output analysis/Comparison graphs between number of children")

model_comp <- model_comp %>% mutate(num_children = as.factor(case_when(
  famsize == 2 ~ "None",
  famsize == 3 ~ "1",
  famsize == 4 ~ "2",
  famsize >= 5 ~ "3 or more"
)))

for (i in model_names){
  model_comp %>% 
    filter(IU_big %in% c("Couple without dependent children", "Couple with dependent children"),
           Model == i,
           famsize != 2) %>%
    ggplot(aes(x = base, y = Equiv_disp, color = num_children)) + geom_point(aes(alpha = Weight))+ 
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Existing policy", y = paste0("Model ",i), color = "Number of children")+guides(alpha = "none")+
  scale_alpha(range = c(0.05,1))+
  theme(legend.text = element_text(size = 11))
ggsave(paste0("Numberchildren_existing_",i,".jpg"), width = 7.29, height = 4.5, dpi = 600)
}

model_comp %>% 
    filter(Model_n %in% c("UBI", "GMI"), IU_big %in% c("Couple without dependent children", "Couple with dependent children"),
           famsize != 2) %>%
    ggplot(aes(x = base, y = Equiv_disp, color = num_children)) + geom_point(aes(alpha = Weight))+ 
  facet_wrap(~Model_n, nrow = 3, ncol = 2)+
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Equivalised household income under existing policy", y = "Equivalised household income under BI model", color = "Number of children")+guides(alpha = "none")+
  scale_alpha(range = c(0.05,1))+
  theme(legend.text = element_text(size = 11), legend.position = "bottom")
ggsave("Numberchildren_comp.png", width = 9, height = 6, dpi = 600)

model_comp %>% 
    filter(Model_n %in% c("UBI", "GMI"), famsize < 3) %>% mutate(Pension_age = ifelse(age >= 66, "Above pension age","Below pension age"),
                                                                 famsize = ifelse(famsize == 1, "Single", "Couple")) %>%
    ggplot(aes(x = base, y = Equiv_disp, color = Pension_age)) + geom_point(aes(alpha = Weight))+ 
  facet_wrap(famsize ~ Model_n, nrow = 2, ncol = 2)+
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Equivalised household income under existing policy", y = "Equivalised household income under BI model", color = "Average income unit age")+guides(alpha = "none")+
  scale_alpha(range = c(0.05,1))+
  theme(legend.text = element_text(size = 11), legend.position = "bottom")
ggsave("Pension_comp.png", width = 9, height = 8, dpi = 600)

model_comp %>% 
    filter(Model_n %in% c("GMI", "UBI") & age <= 65) %>%
    ggplot(aes(x = base, y = Equiv_disp, color = IU_big)) + geom_point(aes(alpha = Weight))+ 
  facet_wrap(~Model_n, nrow = 3, ncol = 2)+
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Equivalised household income under existing policy", y = "Equivalised household income under BI model", color = "Household type")+guides(alpha = "none")+
  scale_alpha(range = c(0.05,1))+
  theme(legend.text = element_text(size = 11), legend.position = "bottom", legend.direction = "vertical")
ggsave("Households under pension age comp.png", width = 9, height = 7, dpi = 600)

setwd("C:/Users/PeterLimerick/Desktop/CAPITA-master/Output analysis")

write.csv(average_changes, "avg changes.csv")

```
# Regressions:

```{r}
library(sjPlot)
library(sjmisc)
library(sjlabelled)

regression_data <- average_changes %>% left_join(compare %>% filter(Model == "base") %>% group_by(IUID) %>% summarise(IUID = median(IUID), Pensions = sum(!is.na(PenType)))) %>%
                                                   mutate(Receives_pension = ifelse(Pensions >= 1, 1, 0), 
                                                          IU_big = as.character(IU_big),
                                                          IU = case_when(
                                                          Pensions >= 1 & IU_big == "Lone person" ~ "Single above pension age",
                                                          Pensions >= 1 ~ "Couple above pension age",
                                                          TRUE ~ IU_big
                                                          )) %>%
  mutate(IU_big = fct_infreq(as.factor(IU_big)), IU = fct_relevel(fct_infreq(as.factor(IU)), "Single above pension age", after = 5)) %>%
  mutate(IU = fct_relevel(IU, "Couple above pension age", after = 5)) %>%
  rename(UBI = ubi, GMI = m4)
                                                          

mm51 <- lm(GMI ~ base, data = regression_data, weights = Weight)
mm52 <- lm(GMI ~ base + IU, data = regression_data, weights = Weight)
mm53 <- lm(GMI ~ base + IU_big + Receives_pension, data = regression_data, weights = Weight)

tab_model(mm51, mm52, show.ci = FALSE, collapse.se = TRUE,
          pred.labels = c("Intercept", "Current equivalised disposable income", "IU type: Working-age couple without dependent children", "IU type: Working-age couple with dependent children", "IU type: Working-age single parent", "IU type: Single above pension age", "IU type: Couple above pension age"))

mubi1 <- lm(UBI ~ base, data = regression_data, weights = Weight)
mubi2 <- lm(UBI ~ base + IU, data = regression_data, weights = Weight)
mubi3 <- lm(UBI ~ base + IU_big + Receives_pension, data = regression_data, weights = Weight)

tab_model(mm51, mm52, mubi1, mubi2, show.ci = FALSE, collapse.se = TRUE,
          pred.labels = c("Intercept", "Current equivalised disposable income", "IU type: Working-age couple without dependent children", "IU type: Working-age couple with dependent children", "IU type: Working-age single parent", "IU type: Single above pension age", "IU type: Couple above pension age"), p.style = "stars", digits = 3)

mm51 <- lm(GMI ~ base, data = regression_data, weights = Weight)
mm52 <- lm(GMI ~ base + IU+0, data = regression_data, weights = Weight)
mubi1 <- lm(UBI ~ base, data = regression_data, weights = Weight)
mubi2 <- lm(UBI ~ base + IU+0, data = regression_data, weights = Weight)

tab_model(mm51, mm52, mubi1, mubi2, show.ci = FALSE, collapse.se = TRUE,
          pred.labels = c("Intercept", "Current equivalised disposable income", "IU type: Working-age lone person", "IU type: Working-age couple without dependent children", "IU type: Working-age couple with dependent children", "IU type: Working-age single parent", "IU type: Single above pension age", "IU type: Couple above pension age"), p.style = "stars", digits = 3)

```

# Other less-important things...

BOXPLOT

```{r}
compare_equiv %>% mutate(Model = as.factor(Model)) %>% mutate(Model = fct_relevel(Model, "m5", after = 5)) %>%
  ggplot(aes(x = Model, y = equivalised_DispInc, weight = Weight))+geom_boxplot()+
    theme_minimal()+theme(axis.title.x = element_blank(), axis.title.y = element_blank())+
  coord_cartesian(ylim = c(0,150000))+
  scale_y_continuous(labels = dollar_format(), )+
  scale_x_discrete(labels = c("Existing policy","Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"))+
  labs(x = "IU Type", y = "Equivalised disposable income", fill = "Model")+
  theme(axis.text.x = element_text(size = 11))
ggsave("boxplots_all_models.png", width = 8, height = 4.5, dpi = 1200)
```

```{r}
add_priv <- compare_equiv %>%           # HERE I AM CREATING NEW EASIER TO UNDERSTAND INCOME UNITS
 mutate(IU = as.factor(case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Couple with dependent children",
    IUTType == 2 & Num_earning_income < 1 ~ "wCouple without a significant income earner",
    IUTType == 2 & Num_earning_income < 2 ~ "Single income couple without dependent children",
    IUTType == 2 ~ "Dual income couple without dependent children",
    IUTType == 3 ~ "Lone parent with dependent children",
    TRUE ~ "Lone person"
  ))) %>%
   mutate(IU_big = as.factor(case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Couple with dependent children",
    IUTType == 2 ~ "Couple without dependent children",
    IUTType == 3 ~ "Lone parent with dependent children",
    TRUE ~ "Lone person"
  ))) %>%
  mutate(equivalised_PrivInc = PrivInc/equivalence_factor) %>%
  select(IUID, DispInc, Weight, age, IU, IU_big, equivalised_PrivInc, Weight) %>%
  pivot_wider(id_cols = c(IUID, IU, IU_big, age, Weight), names_from = Model, values_from = equivalised_PrivInc) %>%
  select(IUID, IU, IU_big, age, Weight, base)

average_changes <- average_changes %>% cbind(add_priv$base) %>% rename(equivalised_PrivInc = `add_priv$base`)



average_changes %>% filter(IU_big == "Couple without dependent children") %>% ggplot(aes(x = equivalised_PrivInc, y = base, color = IU)) + geom_point(aes(alpha = Weight))+ 
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Equivalised private income", y = "Current policy", color = "Couple type")+
  scale_colour_hue(labels = c("Dual income couple", "Single income couple", "No significant income earners"))+guides(alpha = "none")+
  scale_alpha(range = c(0.3,1))

average_changes %>% filter(IU_big == "Couple without dependent children") %>% ggplot(aes(x = equivalised_PrivInc, y = m4, color = IU)) + geom_point(aes(alpha = Weight))+ 
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Equivalised private income", y = "partner-tested GMI", color = "Couple type")+
  scale_colour_hue(labels = c("Dual income couple", "Single income couple", "No significant income earners"))+guides(alpha = "none")+
  scale_alpha(range = c(0.3,1))

average_changes %>% filter(IU_big == "Couple without dependent children") %>% ggplot(aes(x = equivalised_PrivInc, y = ubi, color = IU)) + geom_point(aes(alpha = Weight))+ 
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Equivalised private income", y = "ubi", color = "Couple type")+
  scale_colour_hue(labels = c("Dual income couple", "Single income couple", "No significant income earners"))+guides(alpha = "none")+
  scale_alpha(range = c(0.3,1))

average_changes %>% filter(IU_big == "Lone person") %>% filter(age > 18) %>% ggplot(aes(x = equivalised_PrivInc, y = m5, color = IU)) + geom_point(aes(alpha = Weight))+ 
  geom_abline(linetype = 2)+
  coord_equal(xlim = c(0,100000), ylim = c(0,100000))+
  theme_minimal()+
  scale_y_continuous(labels = dollar_format())+scale_x_continuous(labels = dollar_format())+
  labs(x = "Equivalised private income", y = "ubi", color = "Couple type")+
  scale_colour_hue(labels = c("Dual income couple", "Single income couple", "No significant income earners"))+guides(alpha = "none")+
  scale_alpha(range = c(0.3,1))

```

Inequality in terms of means

```{r}

#Ratio of geometric to arithmetic mean

geo_ari_inequality <- compare_equiv %>% mutate(IU = as.factor(case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Couple with dependent children",
    IUTType == 2 ~ "Couple without dependent children",
    IUTType == 3 ~ "Lone parent with dependent children",
    TRUE ~ "Lone person"
  ))) %>% 
 mutate(equivalised_DispInc = case_when(
    equivalised_DispInc <= 1 ~ 1,
    TRUE ~ equivalised_DispInc)) %>%
  group_by(Model, IU) %>%
  summarise(
    geometric_mean = weighted.geomean(equivalised_DispInc,  Weight),
    arithmetic_mean = weighted.mean(equivalised_DispInc,  Weight)
  ) %>%
  mutate(inequality = geometric_mean/arithmetic_mean) %>% ungroup()

geo_ari_inequality_totals <- compare_equiv %>% mutate(IU = "Total") %>%
  mutate(equivalised_DispInc = case_when(
    equivalised_DispInc <= 1 ~ 1,
    TRUE ~ equivalised_DispInc)) %>%
  group_by(Model, IU) %>%
  summarise(
    geometric_mean = weighted.geomean(equivalised_DispInc,  Weight),
    arithmetic_mean = weighted.mean(equivalised_DispInc,  Weight)
  ) %>%
  mutate(inequality = geometric_mean/arithmetic_mean) %>% ungroup()

geo_ari_inequality <- geo_ari_inequality %>% add_row(geo_ari_inequality_totals) %>% arrange(Model, IU)

geo_ari_inequality_table <- geo_ari_inequality %>% select(Model, IU, inequality) %>% pivot_wider(names_from = Model, values_from = inequality)

geo_inequality <- compare_equiv %>% mutate(IU = as.factor(case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Couple with dependent children",
    IUTType == 2 ~ "Couple without dependent children",
    IUTType == 3 ~ "Lone parent with dependent children",
    TRUE ~ "Lone person"
  ))) %>% 
 mutate(equivalised_DispInc = case_when(
    equivalised_DispInc <= 1 ~ 1,
    TRUE ~ equivalised_DispInc)) %>%
  group_by(Model, IU) %>%
  summarise(
    geometric_mean = weighted.geomean(equivalised_DispInc,  Weight),
    arithmetic_mean = weighted.mean(equivalised_DispInc,  Weight)
  ) %>%
  mutate(geometric_mean_inc = geometric_mean/arithmetic_mean) %>% ungroup()

geo_inequality_totals <- compare_equiv %>% mutate(IU = "Total") %>%
  mutate(equivalised_DispInc = case_when(
    equivalised_DispInc <= 1 ~ 1,
    TRUE ~ equivalised_DispInc)) %>%
  group_by(Model, IU) %>%
  summarise(
    geometric_mean = weighted.geomean(equivalised_DispInc,  Weight),
    arithmetic_mean = weighted.mean(equivalised_DispInc,  Weight)
  ) %>%
  mutate(geometric_mean_inc = geometric_mean/arithmetic_mean) %>% ungroup()

geo_inequality <- geo_inequality %>% add_row(geo_inequality_totals) %>% arrange(Model, IU)

geo_inequality_table <- geo_inequality %>% select(Model, IU, geometric_mean_inc) %>% pivot_wider(names_from = Model, values_from = geometric_mean_inc)

wb <- loadWorkbook("Outputs/geo_ari_inequality.xlsx")
writeData(wb, sheet = "Data", geo_ari_inequality_table)
writeData(wb, sheet = "Data2", geo_inequality_table)
saveWorkbook(wb, "Outputs/geo_ari_inequality.xlsx", overwrite = T)

```

```{r}

compare_equiv %>% group_by(Model) %>% mutate(equivalised_DispInc = case_when(
    equivalised_DispInc <= 1 ~ 1,
    TRUE ~ equivalised_DispInc)) %>%
  summarise(median_disp_inc = weighted.median(DispInc, Weight),
            median_equiv_inc = weighted.median(equivalised_DispInc, Weight), 
            mean_equiv_inc = weighted.mean(equivalised_DispInc, Weight), 
            geomean_equiv_inc = weighted.geomean(equivalised_DispInc, Weight), 
            geometric_arithmetic_ratio = geomean_equiv_inc/mean_equiv_inc
  )
            

```


